- R package
igraph- Get network from files (edgelist, matrix, dataframe)
- Visualization
- Plotting parameters
- Layouts
- Network and node descriptions
1/14/2019
igraph
igraph#install.packages("igraph")
#install.packages("igraphdata")
library(igraph)
library(igraphdata)
#install.packages(dplyr)
#install.packages(tidyr)
#install.packages(stringr)
graph_from_adjacency_matrix()graph_from_edgelist()graph_from_data_frame()graph_from_adjacency_matrix()Used for creating a small matrix.
The networks in real world are usually large sparse matrix and stored as a edgelist.
Binary matrix:
set.seed(2) #sample from Bernoulli distribution with sample size 100. adjm <- matrix(sample(0:1, 100, replace=TRUE, prob=c(0.9,0.1)), nc=10) adjm
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] ## [1,] 0 0 0 0 1 0 0 0 0 1 ## [2,] 0 0 0 0 0 0 0 0 0 0 ## [3,] 0 0 0 0 0 0 0 0 0 0 ## [4,] 0 0 0 0 0 1 0 0 0 0 ## [5,] 1 0 0 0 1 0 0 0 0 0 ## [6,] 1 0 0 0 0 0 0 0 0 0 ## [7,] 0 1 0 0 1 0 0 0 1 0 ## [8,] 0 0 0 0 0 1 0 0 0 0 ## [9,] 0 0 1 0 0 0 0 0 0 0 ## [10,] 0 0 0 0 0 0 0 0 0 0
g1 <- graph_from_adjacency_matrix( adjm ) set.seed(1) plot(g1)
#default is directed g2 <- graph_from_adjacency_matrix( adjm ,mode = "undirected") set.seed(1) plot(g2)
#get rid of the self-loop (in real-world maybe self-loop does not make any sense) g3 <- graph_from_adjacency_matrix( adjm ,mode = "undirected",diag = FALSE) set.seed(1) plot(g3)
Sparse matrix:
adjms=g1[] adjms
## 10 x 10 sparse Matrix of class "dgCMatrix" ## ## [1,] . . . . 1 . . . . 1 ## [2,] . . . . . . . . . . ## [3,] . . . . . . . . . . ## [4,] . . . . . 1 . . . . ## [5,] 1 . . . 1 . . . . . ## [6,] 1 . . . . . . . . . ## [7,] . 1 . . 1 . . . 1 . ## [8,] . . . . . 1 . . . . ## [9,] . . 1 . . . . . . . ## [10,] . . . . . . . . . .
g4=graph_from_adjacency_matrix(adjms) set.seed(1) plot(g4)
Weighted matrix
set.seed(1)
adjmw <- matrix(sample(0:5, 100, replace=TRUE,
prob=c(0.9,0.02,0.02,0.02,0.02,0.02)), nc=10)
adjmw
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] ## [1,] 0 0 3 0 0 0 2 0 0 0 ## [2,] 0 0 0 0 0 0 0 0 0 0 ## [3,] 0 0 0 0 0 0 0 0 0 0 ## [4,] 2 0 0 0 0 0 0 0 0 0 ## [5,] 0 0 0 0 0 0 0 0 0 0 ## [6,] 0 0 0 0 0 0 0 0 0 0 ## [7,] 4 0 0 0 0 0 0 0 0 0 ## [8,] 0 1 0 0 0 0 0 0 0 0 ## [9,] 0 0 0 0 0 0 0 0 0 0 ## [10,] 0 0 0 0 0 0 0 5 0 0
g5 <- graph_from_adjacency_matrix(adjmw, weighted=TRUE) set.seed(1) plot(g5)
g5
## IGRAPH 1d47339 D-W- 10 6 -- ## + attr: weight (e/n) ## + edges from 1d47339: ## [1] 1->3 1->7 4->1 7->1 8->2 10->8
E(g5)$weight
## [1] 3 2 2 4 1 5
Named matrix
rownames(adjmw)=LETTERS[1:10] colnames(adjmw)=LETTERS[1:10] g6 <- graph_from_adjacency_matrix(adjmw, weighted=TRUE) set.seed(1) plot(g6)
graph_from_edgelist()Most network datasets are stored as edgelists. Input is two-column matrix with each row defining one edge.
gotdf=read.csv("gotstark_lannister.csv",stringsAsFactors = FALSE)
head(gotdf,5)
## X Source Target Type weight book source.family ## 1 1 Arya-Stark Benjen-Stark Undirected 3 1 Stark ## 2 2 Arya-Stark Bran-Stark Undirected 14 1 Stark ## 3 3 Arya-Stark Catelyn-Stark Undirected 5 1 Stark ## 4 4 Arya-Stark Cersei-Lannister Undirected 12 1 Stark ## 5 5 Arya-Stark Desmond Undirected 3 1 Stark ## target.family ## 1 Stark ## 2 Stark ## 3 Stark ## 4 Lannister ## 5 <NA>
library(dplyr) library(tidyr)
gotdf.el=gotdf%>%select(Source,Target,weight)%>% group_by(Source,Target)%>% expand(edge=c(1:weight))%>%select(-edge) head(gotdf.el)
## # A tibble: 6 x 2 ## # Groups: Source, Target [2] ## Source Target ## <chr> <chr> ## 1 Arya-Stark Benjen-Stark ## 2 Arya-Stark Benjen-Stark ## 3 Arya-Stark Benjen-Stark ## 4 Arya-Stark Bran-Stark ## 5 Arya-Stark Bran-Stark ## 6 Arya-Stark Bran-Stark
## input need to be a matrix got1=graph_from_edgelist(gotdf.el%>%as.matrix(),directed = FALSE) got1
## IGRAPH 011604f UN-- 99 3374 -- ## + attr: name (v/c) ## + edges from 011604f (vertex names): ## [1] Arya-Stark--Benjen-Stark Arya-Stark--Benjen-Stark ## [3] Arya-Stark--Benjen-Stark Arya-Stark--Bran-Stark ## [5] Arya-Stark--Bran-Stark Arya-Stark--Bran-Stark ## [7] Arya-Stark--Bran-Stark Arya-Stark--Bran-Stark ## [9] Arya-Stark--Bran-Stark Arya-Stark--Bran-Stark ## [11] Arya-Stark--Bran-Stark Arya-Stark--Bran-Stark ## [13] Arya-Stark--Bran-Stark Arya-Stark--Bran-Stark ## [15] Arya-Stark--Bran-Stark Arya-Stark--Bran-Stark ## + ... omitted several edges
plot(got1,edge.arrow.size=.5, vertex.color="gold", vertex.size=3,
vertex.frame.color="gray", vertex.label.color="black",
vertex.label.cex=.5, vertex.label.dist=2, edge.curved=0.2)
el <- matrix( c("foo", "bar","foo","bar", "bar", "foobar"), nc = 2, byrow = TRUE)
graph_from_edgelist(el)%>%plot()
E(got1)$weight=rep(1,ecount(got1))
got1s <- igraph::simplify( got1, remove.multiple = T, remove.loops = F,
edge.attr.comb=c(weight="sum"))
plot(got1s,edge.arrow.size=.5, vertex.color="gold", vertex.size=3,
vertex.frame.color="gray", vertex.label.color="black",
vertex.label.cex=.5, vertex.label.dist=2, edge.curved=0.5,layout=layout_with_lgl)
library(stringr)
nameshort=V(got1s)$name%>% str_split(.,"-",simplify = TRUE)%>% .[,1] V(got1s)$name[1:3]
## [1] "Arya-Stark" "Benjen-Stark" "Bran-Stark"
nameshort[1:3]
## [1] "Arya" "Benjen" "Bran"
V(got1s)$name=nameshort
plot(got1s,edge.arrow.size=.5, vertex.color="gold", vertex.size=3,
vertex.frame.color="gray", vertex.label.color="black",
vertex.label.cex=.5, vertex.label.dist=2, edge.curved=0.5,layout=layout_with_lgl)
graph_from_data_frame()Most common and useful.
d: a data frame containing a symbolic edge list in the first two columns. Additional columns are considered as edge attributes.
vertices: A data frame with vertex metadata
head(gotdf,5)
## X Source Target Type weight book source.family ## 1 1 Arya-Stark Benjen-Stark Undirected 3 1 Stark ## 2 2 Arya-Stark Bran-Stark Undirected 14 1 Stark ## 3 3 Arya-Stark Catelyn-Stark Undirected 5 1 Stark ## 4 4 Arya-Stark Cersei-Lannister Undirected 12 1 Stark ## 5 5 Arya-Stark Desmond Undirected 3 1 Stark ## target.family ## 1 Stark ## 2 Stark ## 3 Stark ## 4 Lannister ## 5 <NA>
gotdf=gotdf%>%select(-X) got2=graph_from_data_frame(d=gotdf,directed = FALSE) got2
## IGRAPH 436d7b1 UNW- 99 238 -- ## + attr: name (v/c), Type (e/c), weight (e/n), book (e/n), ## | source.family (e/c), target.family (e/c) ## + edges from 436d7b1 (vertex names): ## [1] Arya-Stark--Benjen-Stark Arya-Stark--Bran-Stark ## [3] Arya-Stark--Catelyn-Stark Arya-Stark--Cersei-Lannister ## [5] Arya-Stark--Desmond Arya-Stark--Eddard-Stark ## [7] Arya-Stark--Ilyn-Payne Arya-Stark--Jeyne-Poole ## [9] Arya-Stark--Joffrey-Baratheon Arya-Stark--Jon-Snow ## [11] Arya-Stark--Jory-Cassel Arya-Stark--Meryn-Trant ## [13] Arya-Stark--Mordane Arya-Stark--Mycah ## + ... omitted several edges
plot(got2,edge.arrow.size=.5, vertex.color="gold", vertex.size=3,
vertex.frame.color="gray", vertex.label.color="black",
vertex.label.cex=.5, vertex.label.dist=2, edge.curved=0.5,layout=layout_with_lgl)
igraph::as_data_frame(got2)%>%head(2)
## from to Type weight book source.family ## 1 Arya-Stark Benjen-Stark Undirected 3 1 Stark ## 2 Arya-Stark Bran-Stark Undirected 14 1 Stark ## target.family ## 1 Stark ## 2 Stark
as_adjacency_matrix(got2)%>%head(2)
## [1] 0 1
as_edgelist(got2)%>%head(2)
## [,1] [,2] ## [1,] "Arya-Stark" "Benjen-Stark" ## [2,] "Arya-Stark" "Bran-Stark"
read_graph, write_graph## store in txt or csv or others write_graph(graph = got2,file = "g.txt",format = "edgelist") read_graph(file = "g.txt",format = "edgelist",directed=F)
## IGRAPH 6963953 U--- 99 238 -- ## + edges from 6963953: ## [1] 1-- 2 1-- 3 1-- 5 1-- 6 1-- 7 1--12 1--13 1--14 1--17 1--18 1--19 ## [12] 1--20 1--21 1--22 1--23 1--24 1--25 1--26 1--27 1--28 1--29 1--30 ## [23] 1--31 1--32 1--33 1--34 1--35 2-- 3 2-- 6 2--13 2--15 2--21 2--28 ## [34] 2--35 2--36 2--37 2--38 2--39 2--40 2--41 3-- 5 3-- 6 3-- 7 3--12 ## [45] 3--13 3--14 3--15 3--20 3--21 3--22 3--27 3--28 3--29 3--33 3--35 ## [56] 3--37 3--38 3--40 3--42 3--43 3--44 3--45 3--46 3--47 3--48 3--49 ## [67] 3--50 3--51 3--52 3--53 4-- 7 4--11 4--27 4--28 4--52 5-- 6 5-- 7 ## [78] 5-- 8 5--12 5--13 5--14 5--15 5--16 5--20 5--21 5--27 5--28 5--29 ## [89] 5--38 5--40 5--43 5--46 5--51 5--54 5--55 5--56 5--57 5--58 5--59 ## + ... omitted several edges
## store the whole graph write_graph(got2,file = "gg",format = "pajek") read_graph(file="gg",format="pajek")
## IGRAPH fb32cbf U-W- 99 238 -- ## + attr: weight (e/n) ## + edges from fb32cbf: ## [1] 1-- 2 1-- 3 1-- 5 1-- 6 1--17 1-- 7 1--18 1--19 1--20 1--21 1--22 ## [12] 1--23 1--24 1--25 1--26 1--27 1--12 1--13 1--28 1--29 1--30 1--14 ## [23] 1--31 1--32 1--33 1--34 1--35 2-- 3 2-- 6 2--36 2--37 2--21 2--38 ## [34] 2--39 2--13 2--28 2--40 2--15 2--41 2--35 3-- 5 3-- 6 3-- 7 3--42 ## [45] 3--43 3--44 3--45 3--37 3--20 3--46 3--21 3--22 3--47 3--38 3--48 ## [56] 3--49 3--27 3--50 3--51 3--52 3--12 3--13 3--28 3--29 3--14 3--53 ## [67] 3--40 3--33 3--15 3--35 4-- 7 4--11 4--27 4--52 4--28 5-- 6 5--54 ## [78] 5--55 5-- 7 5--56 5--57 5--43 5--58 5-- 8 5--20 5--46 5--21 5--59 ## + ... omitted several edges
got2
## IGRAPH 436d7b1 UNW- 99 238 -- ## + attr: name (v/c), Type (e/c), weight (e/n), book (e/n), ## | source.family (e/c), target.family (e/c) ## + edges from 436d7b1 (vertex names): ## [1] Arya-Stark--Benjen-Stark Arya-Stark--Bran-Stark ## [3] Arya-Stark--Catelyn-Stark Arya-Stark--Cersei-Lannister ## [5] Arya-Stark--Desmond Arya-Stark--Eddard-Stark ## [7] Arya-Stark--Ilyn-Payne Arya-Stark--Jeyne-Poole ## [9] Arya-Stark--Joffrey-Baratheon Arya-Stark--Jon-Snow ## [11] Arya-Stark--Jory-Cassel Arya-Stark--Meryn-Trant ## [13] Arya-Stark--Mordane Arya-Stark--Mycah ## + ... omitted several edges
?igraph.plotting
plot(got2, vertex.color="gold", vertex.size=3,
vertex.frame.color="gray", vertex.label.color="black",
vertex.label.cex=.5, vertex.label.dist=2, edge.curved=0.5,layout=layout_with_lgl)
## store the fullname fullnames=V(got2)$name fullnames[1:3]
## [1] "Arya-Stark" "Benjen-Stark" "Bran-Stark"
#get family name
familynames=fullnames%>%str_split("-",simplify = TRUE)%>%.[,2]
familynames[familynames==""]="None"
familynames[familynames=="(guard)"]="None"
# add vertices attributes
V(got2)$familyname=familynames
V(got2)$fullname=fullnames
V(got2)$name=nameshort # first name
Set colors and legend.
vcol=V(got2)$familyname
vcol[(vcol!="Stark")&(vcol!="Lannister")]="gray50"
vcol[vcol=="Stark"]="tomato"
vcol[vcol=="Lannister"]="gold"
V(got2)$color=vcol
V(got2)$size=degree(got2)%>%log()*4
E(got2)$width=E(got2)$weight%>%log()/2
plot(got2, vertex.label.color="black",
vertex.label.cex=.5, vertex.label.dist=2, edge.curved=0.5,layout=layout_with_kk)
legend("right", legend = c("Stark","Lannister","Other"), pch=21,
col=c("tomato","gold","gray50"), pt.bg=c("tomato","gold","gray50"), pt.cex=1, cex=.8, bty="n", ncol=1)
Plot only labels of the nodes
Force-directed layouts: suitable for general, small to medium sized graphs. (computational complexity; based on physical analogies)
For large graphs:
#layout_with_dh
plot(got2, vertex.label.color="black",
vertex.label.cex=.5,vertex.label.dist=0.2, edge.curved=0.5,layout=layout_with_dh)
legend("right", legend = c("Stark","Lannister","Other"), pch=21,
col=c("tomato","gold","gray50"), pt.bg=c("tomato","gold","gray50"), pt.cex=1, cex=.8, bty="n", ncol=1)
Selecting a layout automatically
plot(got2, vertex.label.color="black",
vertex.label.cex=.5,vertex.label.dist=0.2, edge.curved=0.5,layout=layout.auto(got2))
Without label and color the edge.
set.seed(2)
plot(got2, vertex.shape="none",vertex.label.color="black",
vertex.label.cex=.5,vertex.label.dist=0.2, edge.curved=0.5,layout=layout_with_dh)
##color the edge got2
## IGRAPH 436d7b1 UNW- 99 238 -- ## + attr: name (v/c), familyname (v/c), fullname (v/c), color (v/c), ## | size (v/n), Type (e/c), weight (e/n), book (e/n), source.family ## | (e/c), target.family (e/c), width (e/n) ## + edges from 436d7b1 (vertex names): ## [1] Arya--Benjen Arya--Bran Arya--Cersei Arya--Desmond Arya--Petyr ## [6] Arya--Eddard Arya--Rickon Arya--Robb Arya--Robert Arya--Rodrik ## [11] Arya--Sandor Arya--Sansa Arya--Syrio Arya--Tomard Arya--Tommen ## [16] Arya--Vayon Arya--Jory Arya--Meryn Arya--Yoren Arya--Jaremy ## [21] Arya--Jeor Arya--Mordane Arya--Luwin Arya--Mance Arya--Theon ## [26] Arya--Tyrion Arya--Waymar ## + ... omitted several edges
ecol=rep("gray50",ecount(got2))
ecol[E(got2)$source.family=="Stark"]="tomato"
ecol[E(got2)$source.family=="Lannister"]="gold"
ecol[(ecol=="tomato")&(E(got2)$target.family=="Lannister")&(!is.na(E(got2)$target.family))]="orange"
ecol[(ecol=="gold")&(E(got2)$target.family=="Stark")&(!is.na(E(got2)$target.family))]="orange"
set.seed(2)
plot(got2, vertex.shape="none",vertex.label.color="black", edge.color=ecol,
vertex.label.cex=.5,vertex.label.dist=0.2, edge.curved=0.5,layout=layout_with_dh)
legend("right", legend = c("Stark","Lannister","Stark-Lannister","Other"),
col=c("tomato","gold","orange","gray50"), lty=rep(1,4), cex=.8, bty="n", ncol=1)
Different runs will result in slightly different configurations. Saving the layout or set.seed allows us to get the exact same result multiple times, which can be helpful if you want to plot the time evolution of a graph, or different relationships – and want nodes to stay in the same place in multiple plots.
set.seed(1)
l=layout_with_dh(got2)
plot(got2, vertex.shape="none",vertex.label.color="black",
vertex.label.cex=.5,vertex.label.dist=0.2, edge.curved=0.5,layout=l)
rescale
norm_coordsrescale=Flayout=l*2l=layout_with_fr(got2)
l <- norm_coords(l, ymin=-1, ymax=1, xmin=-1, xmax=1) #default -- scaled
plot(got2, vertex.shape="none",vertex.label.color="black",
vertex.label.cex=.5,vertex.label.dist=0.2, edge.curved=0.5,layout=l,rescale=F)
Will introduce interactive r packages next time.
par(mfrow=c(2,2), mar=c(0,0,0,0))
plot(got2, vertex.shape="none",vertex.label.color="black",
vertex.label.cex=.5,vertex.label.dist=0.2, edge.curved=0.5,layout=l*0.5,rescale=F)
plot(got2, vertex.shape="none",vertex.label.color="black",
vertex.label.cex=.5,vertex.label.dist=0.2, edge.curved=0.5,layout=l*0.8,rescale=F)
plot(got2, vertex.shape="none",vertex.label.color="black",
vertex.label.cex=.5,vertex.label.dist=0.2, edge.curved=0.5,layout=l*1,rescale=F)
plot(got2, vertex.shape="none",vertex.label.color="black",
vertex.label.cex=.5,vertex.label.dist=0.2, edge.curved=0.5,layout=l*2,rescale=F)
#dev.off()
edge_densitydegreecentr_degreecloseness, centr_cloeigen_centrality, centr_eigenbetweenness, edge_betweenness, centr_betwThe proportion of present edges from all possible ties.
edge_density(got2, loops=F)
## [1] 0.04906205
ecount(got2)/(vcount(got2)*(vcount(got2)-1))*2 #for an undirected network
## [1] 0.04906205
'degree' has a mode of 'in' for in-degree, 'out' for out-degree, and 'all' or 'total' for total degree.
Notice the graph is undirected. So there is no difference under different parameter setting.
deg <- degree(got2, mode="all") hist(deg, breaks=1:vcount(got2)-1, main="Histogram of node degree")
deg.dist <- degree_distribution(got2, cumulative=T, mode="all")
plot( x=0:max(deg), y=1-deg.dist, pch=19, cex=1.2, col="orange",
xlab="Degree", ylab="Cumulative Frequency")
Who is the most important character?
Degree (number of ties).
Normalization should be the max degree the network can get
degree(got2, mode="in",loops = F)%>%sort(decreasing = TRUE)%>%.[1:5]
## Eddard Cersei Bran Arya Desmond ## 56 41 32 27 27
#Notice this is undirected network, the choice of mode does not matter centr_degree(got2, mode="in", normalized=T,loops = F)$res%>%sort(decreasing = TRUE)%>%.[1:5]
## [1] 56 41 32 27 27
centr_degree(got2, mode="all", normalized=T,loops = F)$res%>%sort(decreasing = TRUE)%>%.[1:5]
## [1] 56 41 32 27 27
#Pay attention to whether allowing self-loop or not # Normalization may differ due to the setting centr_degree(got2, mode="all", normalized=T,loops = F)$theoretical_max
## [1] 9506
centr_degree(got2, mode="in", normalized=T,loops = F)$theoretical_max
## [1] 9506
centr_degree(got2, mode="in", normalized=T,loops = T)$theoretical_max
## [1] 9702
Closeness (centrality based on distance to others in the graph) Inverse of the node's average geodesic distance to others in the network
#whether to include weight or not #If a graph has edge attribute weight, the weight will be automatically took into consideration closeness(got2, mode="all", weights=NA) %>%sort(decreasing = TRUE)%>%.[1:5]
## Eddard Cersei Bran Arya Desmond ## 0.006993007 0.006329114 0.006097561 0.005882353 0.005847953
closeness(got2, mode="all")%>%sort(decreasing = TRUE)%>%.[1:5]
## Eddard Cersei Donnel Bran Arya ## 0.0010245902 0.0010141988 0.0010080645 0.0010030090 0.0009852217
centr_clo(got2, mode="all", normalized=T)$res %>%sort(decreasing = TRUE)%>%.[1:5]
## [1] 0.6853147 0.6202532 0.5975610 0.5764706 0.5730994
Eigenvector (centrality proportional to the sum of connection centralities) Values of the first eigenvector of the graph adjacency matrix
eigen_centrality(got2, directed=F, weights=NA)$vector%>%sort(decreasing = TRUE)%>%.[1:5]
## Eddard Cersei Bran Desmond Arya ## 1.0000000 0.8163499 0.7410532 0.7276696 0.6740883
eigen_centrality(got2, directed=F)$vector%>%sort(decreasing = TRUE)%>%.[1:5]
## Eddard Yoren Desmond Cersei Vayon ## 1.0000000 0.8538947 0.4281666 0.3352669 0.2441671
centr_eigen(got2, directed=F, normalized=T) $vector%>%sort(decreasing = TRUE)%>%.[1:5]
## [1] 1.0000000 0.8163499 0.7410532 0.7276696 0.6740883
Betweenness (centrality based on a broker position connecting others) (Number of geodesics that pass through the node or the edge)
betweenness(got2, directed=F, weights=NA)%>%sort(decreasing = TRUE)%>%.[1:5]
## Eddard Cersei Bran Arya Meryn ## 2155.2656 1554.1678 915.6561 510.5637 366.8074
betweenness(got2, directed=F)%>%sort(decreasing = TRUE)%>%.[1:5]
## Eddard Cersei Bran Benjen Arya ## 1835.5000 1483.2500 1024.8571 694.4762 689.5833
edge_betweenness(got2, directed=F, weights=NA)%>%sort(decreasing = TRUE)%>%.[1:5]
## [1] 426.4643 271.6982 198.3379 150.0371 133.8635
centr_betw(got2, directed=F, normalized=T)$res%>%sort(decreasing = TRUE)%>%.[1:5]
## [1] 2155.2656 1554.1678 915.6561 510.5637 366.8074